#
# marginalsPlot.R
#
# Plot marginals of example ANES questions.
#
# Hill, Seth J. and Chris Tausanovitch. "A Disconnect in Representation? Comparison of Trends in Congressional and Public Polarization."
#

#
# Identify years for long estimation.
#
library(foreign)
vars <- read.csv("ANES_Cum_Vars_ToUse.csv",as.is=T)
# 2014 release of ANES timeseries.
data <- read.dta("anes_timeseries_cdf.dta")
year  <- data$VCF0004
yearSet <- c(1984:1996,2000,2004,2008)
# subset down to only the variables we will use for scaling
smalldata <- data[year %in% yearSet,names(data) %in% vars$VarNumber]
toremove <- c()
for (i in 1:dim(smalldata)[2]){
  if (! FALSE %in% is.na(unique(smalldata[,i]))){
    toremove <- c(toremove,i)
  }
}
smalldata <- smalldata[,-toremove]
# remove VCF9043a- this is an intensity question which was asked accidentally
smalldata <- smalldata[,-which(names(smalldata)=="VCF9043a")]
names(smalldata) <- vars$VarLabel[match(names(smalldata),vars$VarNumber)]
smalldata$year <- year[year %in% yearSet]
smalldata$weight <- data[year %in% yearSet,"VCF0009"]
smalldata[["7-pt Scale Party ID"]] <- data[year %in% yearSet,"VCF0301"]
smalldata[["R Placement: Liberal-Conservative Scale"]] <- data[year %in% yearSet,"VCF0803"]

# Create a question map.
returnYears <- function(VarDetails) {
  # Function to return a pipe-separated list of years.
  out <- NULL
  reg <- gregexpr("19[0-9]{2}:|20[0-9]{2}:",VarDetails)[[1]]
  for (r in 1:length(reg)) {
    out <- c(out,substr(VarDetails,reg[r],reg[r]+3))
  }
  out <- sort(unique(out))
  return(paste(out,collapse="|"))
}

vars$sources <- sapply(vars$VarDetails,returnYears)
# Sort on first year of question.
vars <- vars[order(vars$sources),]
makePoints <- function(yearString,y=1,pt.cex=.8) {
  years <- as.numeric(strsplit(yearString,"\\|")[[1]])
  allpoints <<- rbind(allpoints,data.frame(x=years,y=rep(y,length(years))))
}

allpoints <- data.frame(x=c(),y=c())
for (i in 1:nrow(vars)) {
  makePoints(yearString=vars[i,'sources'],y=i)
}

somepoints = allpoints[allpoints$x %in% yearSet,]
finalpoints = data.frame(x=c(),y=c())
for (i in unique(somepoints$y)){
    numyears <- length(unique(somepoints$x))
    numquestions <- length(which(somepoints$y==i))
    if (numyears==numquestions){
      years <- unique(somepoints$x)
      finalpoints <<- rbind(finalpoints,data.frame(x=years,y=rep(i,length(years))))
    }
  }

itemnames <- vars$VarLabel[unique(finalpoints$y)]
# Select appropriate subset of variables and years.
datasubset <- smalldata[,names(smalldata) %in% itemnames]
# Tabulate by year.
item.cuts <- data.frame(cut.cat=c(1,4,4,2,3,4,3,1,1,2), #eyeballed medians
              row.names=itemnames)
item.cuts[,sprintf("p.%s",years)] <- NA              
killNADK <- function(tab,DK=T) {
  # Remove elements values of table that are NA DK, etc.
  tab <- tab[,regexpr("NA|not sure",colnames(tab)) == -1]
  if (DK) tab <- tab[,regexpr("DK",colnames(tab)) == -1]
  tab <- tab[,colnames(tab) != "0" & colnames(tab) != "9"]
  tab
}
for (item in itemnames) {
  cat("\n",item,"\n ")
  tab <- killNADK(table(smalldata$year,datasubset[,item]))
  print(colSums(tab))
  # Tabulate dichotomized values by year.
  if (item.cuts[item,'cut.cat'] == 1) {
    item.cuts[item,sprintf("p.%s",rownames(tab))] <- prop.table(tab,margin=1)[,1]
  } else {
    item.cuts[item,sprintf("p.%s",rownames(tab))] <- rowSums(prop.table(tab,margin=1)[,1:item.cuts[item,'cut.cat']])
  }
}

# Add in pid7 and ideo5.
itemnames <- c(itemnames,"7-pt Scale Party ID","R Placement: Liberal-Conservative Scale")
datasubset <- smalldata[,names(smalldata) %in% itemnames]
pdf("graphs/marginals2.pdf",width=10,height=7)
par(mfrow=c(3,ceiling((length(itemnames))/3)),mar=c(0.6,2.1,2.1,0.1),oma=c(0,0,0,0))
for (item in itemnames) {
  cat("\n",item,"\n ")
  x <- as.character(datasubset[,item])
  vals <- unique(x)
  exclude.vals <- vals[regexpr("NA|not sure",vals) != -1 | vals == "0" | vals == "9"]
  if (item == "R Opinion:  Federal Spending- Social Security") {
    exclude.vals <- c(exclude.vals,"7. Cut out entirely (volunteered)")
  }
  tab <- xtabs(smalldata$weight~smalldata$year+x,exclude=exclude.vals)
  # Shorten variables.
  attr(tab,"dimnames")[[1]] <- sapply(attr(tab,"dimnames")[[1]],substr,3,4)
  attr(tab,"dimnames")[[2]] <- sapply(attr(tab,"dimnames")[[2]],substr,1,2)
  mosaicplot(tab,xlab="",ylab="",main="",color=T,las=1)
  title(main=cleanTitle(item),cex.main=.9)
}
dev.off()
